## Load libraries
library(covid19)
library(ggplot2)
library(lubridate)
library(dplyr)
library(ggplot2)
library(sp)
library(raster)
library(viridis)
library(ggthemes)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
# Map data preparation
if(!'map.RData' %in% dir()){
esp1 <- getData(name = 'GADM', country = 'ESP', level = 1)
# Remove canary
esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',]
espf <- fortify(esp1, region = 'NAME_1')
# Standardize names
# Convert names
map_names <- esp1@data$NAME_1
data_names <- sort(unique(esp_df$ccaa))
names_df <- tibble(NAME_1 = c('Andalucía',
'Aragón',
'Cantabria',
'Castilla-La Mancha',
'Castilla y León',
'Cataluña',
'Ceuta y Melilla',
'Comunidad de Madrid',
'Comunidad Foral de Navarra',
'Comunidad Valenciana',
'Extremadura',
'Galicia',
'Islas Baleares',
'La Rioja',
'País Vasco',
'Principado de Asturias',
'Región de Murcia'),
ccaa = c('Andalucía',
'Aragón',
'Cantabria',
'CLM',
'CyL',
'Cataluña',
'Melilla',
'Madrid',
'Navarra',
'C. Valenciana',
'Extremadura',
'Galicia',
'Baleares',
'La Rioja',
'País Vasco',
'Asturias',
'Murcia'))
espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df)
centroids <- data.frame(coordinates(esp1))
names(centroids) <- c('long', 'lat')
centroids$NAME_1 <- esp1$NAME_1
centroids <- centroids %>% left_join(names_df)
# Get random sampling points
random_list <- list()
for(i in 1:nrow(esp1)){
message(i)
shp <- esp1[i,]
# bb <- bbox(shp)
this_ccaa <- esp1@data$NAME_1[i]
# xs <- runif(n = 500, min = bb[1,1], max = bb[1,2])
# ys <- runif(n = 500, min = bb[2,1], max = bb[2,2])
# random_points <- expand.grid(long = xs, lat = ys) %>%
# mutate(x = long,
# y = lat)
# coordinates(random_points) <- ~x+y
# proj4string(random_points) <- proj4string(shp)
# get ccaa
message('getting locations of randomly generated points')
# polys <- over(random_points,polygons(shp))
# polys <- as.numeric(polys)
random_points <- spsample(shp, n = 20000, type = 'random')
random_points <- data.frame(random_points)
random_points$NAME_1 <- this_ccaa
random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1)
random_list[[i]] <- random_points
}
random_points <- bind_rows(random_list)
random_points <- random_points %>% mutate(long = x,
lat = y)
save(espf,
esp1,
names_df,
centroids,
random_points,
file = 'map.RData')
} else {
load('map.RData')
}
# Define a function for adding zerio
add_zero <- function (x, n) {
x <- as.character(x)
adders <- n - nchar(x)
adders <- ifelse(adders < 0, 0, adders)
for (i in 1:length(x)) {
if (!is.na(x[i])) {
x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""),
x[i], collapse = "")
}
}
return(x)
}
options(scipen = '999')
make_map <- function(var = 'deaths',
data = NULL,
pop = FALSE,
pop_factor = 100000,
points = FALSE,
line_color = 'white',
add_names = T,
add_values = T,
text_size = 2.7){
if(is.null(data)){
data <- esp_df
}
left <- espf
right <- data[,c('ccaa', paste0(var, '_non_cum'))]
names(right)[ncol(right)] <- 'var'
right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T))
if(pop){
right <- left_join(right, esp_pop)
right$var <- right$var / right$pop * pop_factor
}
map <- left_join(left, right)
if(points){
the_points <- centroids %>%
left_join(right)
g <- ggplot() +
geom_polygon(data = map,
aes(x = long,
y = lat,
group = group),
fill = 'black',
color = line_color,
lwd = 0.4, alpha = 0.8) +
geom_point(data = the_points,
aes(x = long,
y = lat,
size = var),
color = 'red',
alpha = 0.7) +
scale_size_area(name = '', max_size = 20)
} else {
# cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues')
g <- ggplot(data = map,
aes(x = long,
y = lat,
group = group)) +
geom_polygon(aes(fill = var),
lwd = 0.3,
color = line_color) +
scale_fill_gradientn(name = '',
colours = cols)
# scale_fill_viridis(name = '' ,option = 'magma',
# direction = -1)
}
# Add names?
if(add_names){
centy <- centroids %>% left_join(right)
if(add_values){
centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')')
} else {
centy$label <- centy$ccaa
}
g <- g +
geom_text(data = centy,
aes(x = long,
y = lat,
label = label,
group = ccaa),
alpha = 0.7,
size = text_size)
}
g +
theme_map() +
labs(subtitle = paste0('Data as of ', max(data$date))) +
theme(legend.position = 'right')
}
make_dot_map <- function(var = 'deaths',
date = NULL,
pop = FALSE,
pop_factor = 100,
point_factor = 1,
points = FALSE,
point_color = 'darkred',
point_size = 0.6,
point_alpha = 0.5){
if(is.null(date)){
the_date <- max(esp_df$date)
} else {
the_date <- date
}
right <- esp_df[esp_df$date == the_date,c('ccaa', var)]
names(right)[ncol(right)] <- 'var'
if(pop){
right <- left_join(right, esp_pop)
right$var <- right$var / right$pop * pop_factor
}
map_data <- esp1@data %>%
left_join(names_df) %>%
left_join(right)
map_data$var <- map_data$var / point_factor
out_list <- list()
for(i in 1:nrow(map_data)){
sub_data <- map_data[i,]
this_value = round(sub_data$var)
if(this_value >= 1){
this_ccaa = sub_data$ccaa
# get some points
sub_points <- random_points %>% filter(ccaa == this_ccaa)
sampled_points <- sub_points %>% dplyr::sample_n(this_value)
out_list[[i]] <- sampled_points
}
}
the_points <- bind_rows(out_list)
g <- ggplot() +
geom_polygon(data = espf,
aes(x = long,
y = lat,
group = group),
fill = 'white',
color = 'black',
lwd = 0.4, alpha = 0.8) +
geom_point(data = the_points,
aes(x = long,
y = lat),
color = point_color,
size = point_size,
alpha = point_alpha)
g +
theme_map() +
labs(subtitle = paste0('Data as of ', max(esp_df$date)))
}
make_map(var = 'deaths',
points = T) +
labs(title = 'Number of deaths')
if(!dir.exists('high_quality')){
dir.create('high_quality')
}
ggsave('high_quality/points_death_absolute.pdf', width = 8, height = 6)
make_map(var = 'deaths',
line_color = 'darkgrey',
points = F) +
labs(title = 'Number of deaths')
ggsave('high_quality/polygons_death_absolute.pdf', width = 8, height = 6)
make_map(var = 'deaths', pop = TRUE, points = T) +
labs(title = 'Number of deaths per 100,000')
ggsave('high_quality/points_death_adjusted.pdf', width = 8, height = 6)
make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') +
labs(title = 'Number of deaths per 100,000')
ggsave('high_quality/polygons_death_adjusted.pdf', width = 8, height = 6)
make_dot_map(var = 'deaths', point_size = 0.15) +
labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location')
ggsave('high_quality/point_per_death.pdf', width = 8, height = 6)
make_map(var = 'cases',
points = T) +
labs(title = 'Number of confirmed cases')
ggsave('high_quality/points_case_absolute.pdf', width = 8, height = 6)
make_map(var = 'cases',
line_color = 'darkgrey',
points = F) +
labs(title = 'Number of confirmed cases')
ggsave('high_quality/polygon_case_absolute.pdf', width = 8, height = 6)
make_map(var = 'cases', pop = TRUE, points = T) +
labs(title = 'Number of confirmed cases per 100,000')
ggsave('high_quality/points_case_adjusted.pdf', width = 8, height = 6)
make_map(var = 'cases', pop = TRUE, points = F,
line_color = 'darkgrey') +
labs(title = 'Number of confirmed cases per 100,000')
ggsave('high_quality/polygons_case_adjusted.pdf', width = 8, height = 6)
keep_dates <- as.Date(seq((max(esp_df$date)-6), max(esp_df$date), by = 1))
data <- esp_df %>% filter(date %in% keep_dates)
make_map(var = 'cases', data = data, pop = TRUE, points = F,
line_color = 'darkgrey') +
labs(title = 'Number of confirmed cases per 100,000',
caption = paste0('Cumulative incidence over the week from ',
min(keep_dates), ' through ',
max(keep_dates)))
ggsave('high_quality/polygons_case_adjusted_week.pdf', width = 8, height = 6)
make_dot_map(var = 'cases',
point_size = 0.05, point_alpha = 0.5, point_factor = 10) +
labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location')
ggsave('high_quality/point_per_10_cases.pdf', width = 8, height = 6)
Calculate cumulative incidence per 1.000.000 per week
keep_dates <- as.Date(seq((max(esp_df$date)-6), max(esp_df$date), by = 1))
x <- esp_df %>%
filter(date %in% keep_dates) %>%
group_by(ccaa) %>%
summarise(incident_cases_over_7_days = sum(cases_non_cum)) %>%
ungroup %>%
left_join(esp_pop) %>%
mutate(incident_cases_over_7_days_per_10e4 = (incident_cases_over_7_days / pop) * 10e4) %>%
arrange(desc(incident_cases_over_7_days_per_10e4))
kable(x)
| ccaa | incident_cases_over_7_days | pop | incident_cases_over_7_days_per_10e4 |
|---|---|---|---|
| La Rioja | 963 | 316798 | 303.97919 |
| CLM | 4785 | 2032863 | 235.38232 |
| Madrid | 14907 | 6663394 | 223.71482 |
| Navarra | 1062 | 654214 | 162.33220 |
| Cataluña | 11006 | 7675217 | 143.39660 |
| CyL | 3335 | 2399548 | 138.98451 |
| País Vasco | 2888 | 2207776 | 130.81037 |
| Aragón | 1374 | 1319291 | 104.14685 |
| Galicia | 2805 | 2699499 | 103.90817 |
| Ceuta | 62 | 84777 | 73.13304 |
| Cantabria | 418 | 581078 | 71.93527 |
| Extremadura | 591 | 1067710 | 55.35211 |
| Asturias | 517 | 1022800 | 50.54752 |
| C. Valenciana | 2400 | 5003769 | 47.96384 |
| Andalucía | 3619 | 8414240 | 43.01042 |
| Melilla | 35 | 86487 | 40.46851 |
| Baleares | 335 | 1149460 | 29.14412 |
| Murcia | 363 | 1493898 | 24.29885 |
| Canarias | 497 | 2153389 | 23.07990 |